# Replication files for JOP article:
# Beyond the glass ceiling, more `housework'? 
# Womens' work assignment, performance and influence in political institutions


# By Louisa Boulaziz 


# Description of script: This script includes all the alternative analysis 
# for the duration models as well as descriptive statistics, APPENDIX B.

#----------------------------------------------------------------------
# PACKAGES

library(tidyverse)
library(gridExtra)
library(stargazer)
library(mclogit)
library(survival)
library(survminer)
library(coxed)
library(eha)
library(MASS)
library(lmtest)
library(multiwayvcov)
library(marginaleffects)


#----------------------------------------------------------------------
# REPLICATING MODELS FROM APPENDIX B1, TABLE 9 AND 10 

load("time_data4.Rdata")

time_data2 <- ddd
duration00 <- glm.nb(duration_days ~
                       is_female+
                       # MS fixed effects 
                       member_state,
                     data = time_data2
)


duration00$ses  <- coeftest(duration00,
                            vcov. = cluster.vcov(duration00,
                                                 cluster = time_data2$iuropa_decision_id))[,2]


duration00$ps  <- coeftest(duration00,
                           vcov. = cluster.vcov(duration00,
                                                cluster = time_data2$decision_year))[,4]

### Without ms effects 

duration00_ms <- glm.nb(duration_days ~
                          is_female,
                        # MS fixed effects 
                        #member_state,
                        data = time_data2
)


duration00_ms$ses  <- coeftest(duration00_ms,
                               vcov. = cluster.vcov(duration00_ms,
                                                    cluster = time_data2$iuropa_decision_id))[,2]


duration00_ms$ps  <- coeftest(duration00_ms,
                              vcov. = cluster.vcov(duration00_ms,
                                                   cluster = time_data2$decision_year))[,4]

############# Bivariate + salience

duration0 <- glm.nb(duration_days ~
                      is_female * salience +
                      # MS fixed effects 
                      member_state,
                    data = time_data2
)


duration0$ses  <- coeftest(duration0,
                           vcov. = cluster.vcov(duration0,
                                                cluster = time_data2$iuropa_decision_id))[,2]


duration0$ps  <- coeftest(duration0,
                          vcov. = cluster.vcov(duration0,
                                               cluster = time_data2$decision_year))[,4]

### Without ms effects 

duration0_ms <- glm.nb(duration_days ~
                         is_female * salience, 
                       # MS fixed effects 
                       # member_state,
                       data = time_data2
)


duration0_ms$ses  <- coeftest(duration0_ms,
                              vcov. = cluster.vcov(duration0_ms,
                                                   cluster = time_data2$iuropa_decision_id))[,2]


duration0_ms$ps  <- coeftest(duration0_ms,
                             vcov. = cluster.vcov(duration0_ms,
                                                  cluster = time_data2$decision_year))[,4]

########### Model without salience

duration1 <- glm.nb(duration_days ~
                      is_female + salience + 
                      # Background 
                      former_gc_judge + former_ag + was_judge + was_academic + 
                      was_civil_servant + was_lawyer + was_politician + 
                      # Judge experience in court and age 
                      age_judge_when_case_decided + years_in_court+ decisions_per_year + 
                      # Case complexity 
                      count_policy_areas + count_cited_documents + hearing +# count_policy_areas + #count_judges +
                      # Type of case 
                      decision_type +  test_policy_area, # + 
                    # MS fixed effects 
                    #member_state,
                    data = time_data2
)


duration1$ses  <- coeftest(duration1,
                           vcov. = cluster.vcov(duration1,
                                                cluster = time_data2$iuropa_decision_id))[,2]


duration1$ps  <- coeftest(duration1,
                          vcov. = cluster.vcov(duration1,
                                               cluster = time_data2$decision_year))[,4]


############# Models without salience and MS effects 

duration1_ms <- glm.nb(duration_days ~
                         is_female + salience + 
                         # Background 
                         former_gc_judge + former_ag + was_judge + was_academic + 
                         was_civil_servant + was_lawyer + was_politician + 
                         # Judge experience in court and age 
                         age_judge_when_case_decided + years_in_court+ decisions_per_year + 
                         # Case complexity 
                         count_policy_areas + count_cited_documents + hearing +# count_policy_areas + #count_judges +
                         # Type of case 
                         decision_type +  test_policy_area + 
                         # MS fixed effects 
                         member_state,
                       data = time_data2
)


duration1_ms$ses  <- coeftest(duration1_ms,
                              vcov. = cluster.vcov(duration1_ms,
                                                   cluster = time_data2$iuropa_decision_id))[,2]


duration1_ms$ps  <- coeftest(duration1_ms,
                             vcov. = cluster.vcov(duration1_ms,
                                                  cluster = time_data2$decision_year))[,4]


##### Salience 

duration2 <- glm.nb(duration_days ~
                      is_female * salience + 
                      # Background 
                      former_gc_judge + former_ag + was_judge + was_academic + 
                      was_civil_servant + was_lawyer + was_politician + 
                      # Judge experience in court and age 
                      age_judge_when_case_decided + years_in_court+ decisions_per_year + 
                      # Case complexity 
                      count_policy_areas + count_cited_documents + hearing +# count_policy_areas + #count_judges +
                      # Type of case 
                      decision_type +  test_policy_area+ 
                      # MS fixed effects 
                      member_state,
                    data = time_data2
)



duration2$ses  <- coeftest(duration2,
                           vcov. = cluster.vcov(duration2,
                                                cluster = time_data2$iuropa_decision_id))[,2]


duration2$ps  <- coeftest(duration2,
                          vcov. = cluster.vcov(duration2,
                                               cluster = time_data2$decision_year))[,4]


#### Salience without MS fixed effects 

duration2_ms <-  glm.nb(duration_days ~
                          is_female * salience + 
                          # Background 
                          former_gc_judge + former_ag + was_judge + was_academic + 
                          was_civil_servant + was_lawyer + was_politician + 
                          # Judge experience in court and age 
                          age_judge_when_case_decided + years_in_court+ decisions_per_year + 
                          # Case complexity 
                          count_policy_areas + count_cited_documents + hearing +# count_policy_areas + #count_judges +
                          # Type of case 
                          decision_type +  test_policy_area ,
                        # MS fixed effects 
                        # member_state,
                        data = time_data2
)



duration2_ms$ses  <- coeftest(duration2_ms,
                              vcov. = cluster.vcov(duration2_ms,
                                                   cluster = time_data2$iuropa_decision_id))[,2]


duration2_ms$ps  <- coeftest(duration2_ms,
                             vcov. = cluster.vcov(duration2_ms,
                                                  cluster = time_data2$decision_year))[,4]


# TABLE 9

stargazer(duration00_ms, duration00, duration1, duration1_ms, type = "text", omit = c("member_state", "test_policy_area"), dep.var.caption= "Duration days",
          dep.var.labels = "",
          single.row =T, 
          omit.stat = c("rsq", "ll", "logrank", "wald", "max.rsq", "lr"),
          # dep.var.labels = c("(1) All votes          (2) Split votes"),
          # model.names = TRUE,
          # model.numbers = F,
          covariate.labels = c("Female judge", 
                               "Case importance", 
                               "Former GC judge", 
                               "Former AG", 
                               "Former judge", 
                               "Former academic", 
                               "Former civil servant", 
                               "Former lawyer", 
                               "Former politician", 
                               "Age", 
                               "Years in court", 
                               "Workload",
                               "Count policy areas",
                               "Count cited documents", 
                               "Case with hearing",
                               "Procedure, Preliminary ruling" 
                               #"Female judge x Case importance"
          ),
          add.lines=list(c("Member state fixed effects",
                           "No","Yes", "No", "Yes"), 
                         c("Policy areas dummies","No", "No", "Yes", "Yes")), 
          header = FALSE, 
          font.size = "scriptsize", 
          title = "Effect of gender on case duration, with and without member state fixed effects")

# TABLE 10


stargazer(duration0_ms, duration0, duration2_ms, duration2, type = "text", omit = c("member_state", "test_policy_area"), dep.var.caption= "Duration days",
          dep.var.labels = "",
          single.row =T, 
          omit.stat = c("rsq", "ll", "logrank", "wald", "max.rsq", "lr"),
          # dep.var.labels = c("(1) All votes          (2) Split votes"),
          # model.names = TRUE,
          # model.numbers = F,
          covariate.labels = c("Female judge", 
                               "Case importance", 
                               "Former GC judge", 
                               "Former AG", 
                               "Former judge", 
                               "Former academic", 
                               "Former civil servant", 
                               "Former lawyer", 
                               "Former politician", 
                               "Age", 
                               "Years in court", 
                               "Workload",
                               "Count policy areas",
                               "Count cited documents", 
                               "Case with hearing",
                               "Procedure, Preliminary ruling", 
                               "Female judge x Case importance"
          ),
          add.lines=list(c("Member state fixed effects",
                           "No","Yes", "No", "Yes"), 
                         c("Policy areas dummies","No", "No", "Yes", "Yes")), 
          header = FALSE, 
          font.size = "scriptsize", 
          title = "Effect of gender on case duration, with and without member state fixed effects")

#----------------------------------------------------------------------
# REPLICATING MODELS FROM APPENDIX B2, TABLE 11

table(time_data2$count_judges)


time_data2$salience1 <- ifelse(time_data2$count_judges > 5, 2,  ifelse(time_data2$count_judges == 5, 1, ifelse(time_data2$count_judges < 5, 0, time_data2$count_judges)))

table(time_data2$salience1)
time_data2$salience1 <- as.factor(time_data2$salience1)
# Running models 


duration0 <- glm.nb(duration_days ~
                      is_female * salience1 +
                      # MS fixed effects 
                      member_state,
                    data = time_data2
)


duration0$ses  <- coeftest(duration0,
                           vcov. = cluster.vcov(duration0,
                                                cluster = time_data2$iuropa_decision_id))[,2]


duration0$ps  <- coeftest(duration0,
                          vcov. = cluster.vcov(duration0,
                                               cluster = time_data2$decision_year))[,4]



duration1 <- glm.nb(duration_days ~
                      is_female * salience1 + 
                      # Background 
                      former_gc_judge + former_ag + was_judge + was_academic + 
                      was_civil_servant + was_lawyer + was_politician + 
                      # Judge experience in court and age 
                      age_judge_when_case_decided + years_in_court+ decisions_per_year + 
                      # Case complexity 
                      count_policy_areas + count_cited_documents + hearing +# count_policy_areas + #count_judges +
                      # Type of case 
                      decision_type +  test_policy_area, # + 
                    # MS fixed effects 
                    #member_state,
                    data = time_data2
)


duration1$ses  <- coeftest(duration1,
                           vcov. = cluster.vcov(duration1,
                                                cluster = time_data2$iuropa_decision_id))[,2]


duration1$ps  <- coeftest(duration1,
                          vcov. = cluster.vcov(duration1,
                                               cluster = time_data2$decision_year))[,4]


duration2 <- glm.nb(duration_days ~
                      is_female * salience1 + 
                      # Background 
                      former_gc_judge + former_ag + was_judge + was_academic + 
                      was_civil_servant + was_lawyer + was_politician + 
                      # Judge experience in court and age 
                      age_judge_when_case_decided + years_in_court+ decisions_per_year + 
                      # Case complexity 
                      count_policy_areas + count_cited_documents + hearing +# count_policy_areas + #count_judges +
                      # Type of case 
                      decision_type +  test_policy_area + 
                      # MS fixed effects 
                      member_state,
                    data = time_data2
)



duration2$ses  <- coeftest(duration2,
                           vcov. = cluster.vcov(duration2,
                                                cluster = time_data2$iuropa_decision_id))[,2]


duration2$ps  <- coeftest(duration2,
                          vcov. = cluster.vcov(duration2,
                                               cluster = time_data2$decision_year))[,4]


# Making table 11

stargazer( duration0, duration1, duration2, type = "text", omit = c("member_state", "test_policy_area"), dep.var.caption= "Duration days",
           dep.var.labels = "",
           single.row =T, 
           omit.stat = c("rsq", "ll", "logrank", "wald", "max.rsq", "lr"),
           # dep.var.labels = c("(1) All votes          (2) Split votes"),
           # model.names = TRUE,
           # model.numbers = F,
           covariate.labels = c("Female judge", 
                                "N judges = 5",
                                "N judges > 5",
                                "Former GC judge", 
                                "Former AG", 
                                "Former judge", 
                                "Former academic", 
                                "Former civil servant", 
                                "Former lawyer", 
                                "Former politician", 
                                "Age", 
                                "Years in court",
                                "Workload",
                                "Count policy areas",
                                "Count cited documents", 
                                "Case with hearing",
                                # "Count policy areas",
                                "Preliminary ruling", 
                                "Female judge x N judges = 5",
                                "Female judge x N judges > 5"
                                
           ),
           add.lines=list(c("Member state fixed effects",
                            "Yes", "No", "Yes"), 
                          c("Policy areas dummies","No", "Yes", "Yes")), 
           header = FALSE, 
           font.size = "scriptsize", 
           title = "Effect of gender on case duration, using an alternative measure of case importance")

#----------------------------------------------------------------------
# REPLICATING MODELS FROM APPENDIX B3, TABLE 12 

# Loading in the right data

load("with_new_DV_time_data.Rdata")

duration00 <- glm.nb(duration_to_write_days ~
                       is_female+
                       # MS fixed effects 
                       member_state,
                     data = df
)


duration00$ses  <- coeftest(duration00,
                            vcov. = cluster.vcov(duration00,
                                                 cluster = df$iuropa_decision_id))[,2]


duration00$ps  <- coeftest(duration00,
                           vcov. = cluster.vcov(duration00,
                                                cluster = df$decision_year))[,4]

duration0 <- glm.nb(duration_to_write_days ~
                      is_female * salience +
                      # MS fixed effects 
                      member_state,
                    data = df
)


duration0$ses  <- coeftest(duration0,
                           vcov. = cluster.vcov(duration0,
                                                cluster = df$iuropa_decision_id))[,2]


duration0$ps  <- coeftest(duration0,
                          vcov. = cluster.vcov(duration0,
                                               cluster = df$decision_year))[,4]



duration1 <- glm.nb(duration_to_write_days ~
                      is_female + salience +
                      # Background 
                      former_gc_judge + former_ag + was_judge + was_academic + 
                      was_civil_servant + was_lawyer + was_politician + 
                      # Judge experience in court and age 
                      age_judge_when_case_decided +years_in_court+ decisions_per_year +
                      # Case complexity 
                      count_cited_documents + # count_policy_areas + #count_judges +
                      # Type of case 
                      decision_type +  test_policy_area + 
                      # MS fixed effects 
                      member_state,
                    data = df
)


duration1$ses  <- coeftest(duration1,
                           vcov. = cluster.vcov(duration1,
                                                cluster = df$iuropa_decision_id))[,2]


duration1$ps  <- coeftest(duration1,
                          vcov. = cluster.vcov(duration1,
                                               cluster = df$decision_year))[,4]


duration2 <- glm.nb(duration_to_write_days ~
                      is_female*salience + 
                      # Background 
                      former_gc_judge + former_ag + was_judge + was_academic + 
                      was_civil_servant + was_lawyer + was_politician + 
                      # Judge experience in court and age 
                      age_judge_when_case_decided +years_in_court+  decisions_per_year +
                      # Case complexity 
                      count_cited_documents + # count_policy_areas + #count_judges +
                      # Type of case 
                      decision_type + test_policy_area+
                      # MS fixed effects 
                      member_state, 
                    data = df
)


duration2$ses  <- coeftest(duration2,
                           vcov. = cluster.vcov(duration2,
                                                cluster = df$iuropa_decision_id))[,2]


duration2$ps  <- coeftest(duration2,
                          vcov. = cluster.vcov(duration2,
                                               cluster = df$decision_year))[,4]

# Making table 

stargazer(duration00, duration0, duration1, duration2, type = "text", omit = c("member_state", "test_policy_area"), dep.var.caption= "Duration days",
          dep.var.labels = "",
          single.row =T, 
          omit.stat = c("rsq", "ll", "logrank", "wald", "max.rsq", "lr"),
          # dep.var.labels = c("(1) All votes          (2) Split votes"),
          # model.names = TRUE,
          # model.numbers = F,
          covariate.labels = c("Female judge", 
                               "Case importance", 
                               "Former GC judge", 
                               "Former AG", 
                               "Former judge", 
                               "Former academic", 
                               "Former civil servant", 
                               "Former lawyer", 
                               "Former politician", 
                               "Age", 
                               "Years in court",
                               "Workload",
                               "Count cited documents", 
                               # "Count policy areas",
                               "Preliminary ruling",  
                               "Female x Case importance"
          ),
          add.lines=list(c("Member state fixed effects",
                           "Yes","Yes", "Yes", "Yes"), 
                         c("Policy areas dummies","No", "No", "Yes", "Yes")), 
          header = FALSE, 
          font.size = "scriptsize", 
          title = "Effect of gender and case importance on case duration, where case duration is measured as the number of days from the last day of the hearing until decision date.")


#----------------------------------------------------------------------
# REPLICATING FIGURE FROM APPENDIX B3, FIGURE 11

p <- avg_predictions(duration1, by = "is_female")

model_effects1 <- ggplot(p, aes(x = as.factor(is_female), y = estimate, ymin = conf.low, ymax = conf.high)) +
  geom_pointrange() + 
  coord_flip() + 
  scale_x_discrete(labels=c("1" = "Female judge", "0" = "Male judge"))+
  xlab("") + 
  ylab("Case duration in days") + 
  ggtitle("Gender difference in case duration for all cases") + 
  theme_classic()

model_effects1
## Model 4 

p1 <- avg_predictions(duration2, by = c("is_female", "salience"))


p1 <- p1 %>% 
  filter(salience == 1) 

model_effects2 <- ggplot(p1, aes(x = as.factor(is_female), y = estimate, ymin = conf.low, ymax = conf.high)) + 
  geom_pointrange() + 
  coord_flip()+
  scale_x_discrete(labels=c("1" = "Female judge", "0" = "Male judge"))+
  xlab("") + 
  ylab("Case duration in days") + 
  ggtitle("Gender difference in case duration for important cases only") + 
  theme_classic()
model_effects2

#----------------------------------------------------------------------
# REPLICATING TABLE FROM APPENDIX B4, TABLE 13

time_data2$hearing <- as.numeric(time_data2$hearing)

to_show <- time_data2 %>% 
  dplyr::select(is_female, salience,  former_gc_judge, 
                former_ag, was_judge, was_academic, 
                was_civil_servant, was_lawyer, 
                was_politician, age_judge_when_case_decided, 
                years_in_court, decisions_per_year, count_policy_areas, count_cited_documents, 
                hearing,
                decision_type, test_policy_area, member_state)

stargazer(to_show, type = "text", header = FALSE, 
          covariate.labels = c("Female judge", 
                               "Case importance",
                               "Former GC judge", 
                               "Former AG", 
                               "Former judge",
                               "Former academic", 
                               "Former civil servant", 
                               "Fomer lawyer", 
                               "Former politician",
                               "Age when case decided", 
                               "Years in court",
                               "Workload", 
                               "Count policy areas",
                               "Count cited documents",
                               "Case with hearing" ), 
          title = "Distribution of continous and binary variables from the analysis. Variables at the case level.")
